if ( dir )
{
memcpy(p, buf, count); /* dir == TRUE: *to* guest */
- mark_dirty(current->domain, mfn);
+ paging_mark_dirty(current->domain, mfn);
}
else
memcpy(buf, p, count); /* dir == FALSE: *from guest */
if ( (p->dir == IOREQ_READ) && p->data_is_ptr )
{
gmfn = get_mfn_from_gpfn(paging_gva_to_gfn(v, p->data));
- mark_dirty(d, gmfn);
+ paging_mark_dirty(d, gmfn);
}
out:
return 1;
}
- /* We should not reach here. Otherwise, P2M table is not correct.*/
- return 0;
+ paging_mark_dirty(current->domain, get_mfn_from_gpfn(gpa >> PAGE_SHIFT));
+ return p2m_set_flags(current->domain, gpa, __PAGE_HYPERVISOR|_PAGE_USER);
}
static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
/* A page table is dirtied when its type count becomes non-zero. */
if ( likely(owner != NULL) )
- mark_dirty(owner, page_to_mfn(page));
+ paging_mark_dirty(owner, page_to_mfn(page));
switch ( type & PGT_type_mask )
{
if ( unlikely(paging_mode_enabled(owner)) )
{
/* A page table is dirtied when its type count becomes zero. */
- mark_dirty(owner, page_to_mfn(page));
+ paging_mark_dirty(owner, page_to_mfn(page));
if ( shadow_mode_refcounts(owner) )
return;
}
/* A page is dirtied when its pin status is set. */
- mark_dirty(d, mfn);
+ paging_mark_dirty(d, mfn);
/* We can race domain destruction (domain_relinquish_resources). */
if ( unlikely(this_cpu(percpu_mm_info).foreign != NULL) )
put_page_and_type(page);
put_page(page);
/* A page is dirtied when its pin status is cleared. */
- mark_dirty(d, mfn);
+ paging_mark_dirty(d, mfn);
}
else
{
set_gpfn_from_mfn(mfn, gpfn);
okay = 1;
- mark_dirty(FOREIGNDOM, mfn);
+ paging_mark_dirty(FOREIGNDOM, mfn);
put_page(mfn_to_page(mfn));
break;
break;
}
- mark_dirty(dom, mfn);
+ paging_mark_dirty(dom, mfn);
/* All is good so make the update. */
gdt_pent = map_domain_page(mfn);
#undef page_to_mfn
#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+/************************************************/
+/* HAP LOG DIRTY SUPPORT */
+/************************************************/
+/* hap code to call when log_dirty is enable. return 0 if no problem found. */
+int hap_enable_log_dirty(struct domain *d)
+{
+ hap_lock(d);
+ /* turn on PG_log_dirty bit in paging mode */
+ d->arch.paging.mode |= PG_log_dirty;
+ /* set l1e entries of P2M table to NOT_WRITABLE. */
+ p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+ flush_tlb_all_pge();
+ hap_unlock(d);
+
+ return 0;
+}
+
+int hap_disable_log_dirty(struct domain *d)
+{
+ hap_lock(d);
+ d->arch.paging.mode &= ~PG_log_dirty;
+ /* set l1e entries of P2M table with normal mode */
+ p2m_set_flags_global(d, __PAGE_HYPERVISOR|_PAGE_USER);
+ hap_unlock(d);
+
+ return 1;
+}
+
+void hap_clean_dirty_bitmap(struct domain *d)
+{
+ /* mark physical memory as NOT_WRITEABLE and flush the TLB */
+ p2m_set_flags_global(d, (_PAGE_PRESENT|_PAGE_USER));
+ flush_tlb_all_pge();
+}
/************************************************/
/* HAP SUPPORT FUNCTIONS */
/************************************************/
}
}
+ /* initialize log dirty here */
+ paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
+ hap_clean_dirty_bitmap);
+
/* allocate P2m table */
if ( mode & PG_translate ) {
rv = p2m_alloc_table(d, hap_alloc_p2m_page, hap_free_p2m_page);
HERE_I_AM;
- if ( unlikely(d == current->domain) ) {
- gdprintk(XENLOG_INFO, "Don't try to do a hap op on yourself!\n");
- return -EINVAL;
- }
-
switch ( sc->op ) {
case XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION:
hap_lock(d);
hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
l1_pgentry_t new, unsigned int level)
{
- hap_lock(v->domain);
+ int do_locking;
+
+ /* This function can be called from two directions (P2M and log dirty). We
+ * need to make sure this lock has been held or not.
+ */
+ do_locking = !hap_locked_by_me(v->domain);
+
+ if ( do_locking )
+ hap_lock(v->domain);
+
safe_write_pte(p, new);
#if CONFIG_PAGING_LEVELS == 3
/* install P2M in monitor table for PAE Xen */
}
#endif
- hap_unlock(v->domain);
+
+ if ( do_locking )
+ hap_unlock(v->domain);
}
/* Entry points into this mode of the hap code. */
// Returns 0 on error (out of memory)
static int
-set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn)
+set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn, u32 l1e_flags)
{
// XXX -- this might be able to be faster iff current->domain == d
mfn_t table_mfn = pagetable_get_mfn(d->arch.phys_table);
d->arch.p2m.max_mapped_pfn = gfn;
if ( mfn_valid(mfn) )
- entry_content = l1e_from_pfn(mfn_x(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
+ entry_content = l1e_from_pfn(mfn_x(mfn), l1e_flags);
else
entry_content = l1e_empty();
p2m_unlock(d);
return -ENOMEM;
}
-list_add_tail(&p2m_top->list, &d->arch.p2m.pages);
+ list_add_tail(&p2m_top->list, &d->arch.p2m.pages);
p2m_top->count_info = 1;
p2m_top->u.inuse.type_info =
/* Initialise physmap tables for slot zero. Other code assumes this. */
gfn = 0;
-mfn = _mfn(INVALID_MFN);
- if ( !set_p2m_entry(d, gfn, mfn) )
+ mfn = _mfn(INVALID_MFN);
+ if ( !set_p2m_entry(d, gfn, mfn, __PAGE_HYPERVISOR|_PAGE_USER) )
goto error;
for ( entry = d->page_list.next;
(gfn != 0x55555555L)
#endif
&& gfn != INVALID_M2P_ENTRY
- && !set_p2m_entry(d, gfn, mfn) )
+ && !set_p2m_entry(d, gfn, mfn, __PAGE_HYPERVISOR|_PAGE_USER) )
goto error;
}
/* This m2p entry is stale: the domain has another frame in
* this physical slot. No great disaster, but for neatness,
* blow away the m2p entry. */
- set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
+ set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY, __PAGE_HYPERVISOR|_PAGE_USER);
}
if ( test_linear && (gfn <= d->arch.p2m.max_mapped_pfn) )
ASSERT(mfn_x(gfn_to_mfn(d, gfn)) == mfn);
//ASSERT(mfn_to_gfn(d, mfn) == gfn);
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
}
omfn = gfn_to_mfn(d, gfn);
if ( mfn_valid(omfn) )
{
- set_p2m_entry(d, gfn, _mfn(INVALID_MFN));
+ set_p2m_entry(d, gfn, _mfn(INVALID_MFN), __PAGE_HYPERVISOR|_PAGE_USER);
set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
}
}
}
- set_p2m_entry(d, gfn, _mfn(mfn));
+ set_p2m_entry(d, gfn, _mfn(mfn), __PAGE_HYPERVISOR|_PAGE_USER);
set_gpfn_from_mfn(mfn, gfn);
audit_p2m(d);
p2m_unlock(d);
}
+/* This function goes through P2M table and modify l1e flags of all pages. Note
+ * that physical base address of l1e is intact. This function can be used for
+ * special purpose, such as marking physical memory as NOT WRITABLE for
+ * tracking dirty pages during live migration.
+ */
+void p2m_set_flags_global(struct domain *d, u32 l1e_flags)
+{
+ unsigned long mfn, gfn;
+ l1_pgentry_t l1e_content;
+ l1_pgentry_t *l1e;
+ l2_pgentry_t *l2e;
+ int i1, i2;
+#if CONFIG_PAGING_LEVELS >= 3
+ l3_pgentry_t *l3e;
+ int i3;
+#if CONFIG_PAGING_LEVELS == 4
+ l4_pgentry_t *l4e;
+ int i4;
+#endif /* CONFIG_PAGING_LEVELS == 4 */
+#endif /* CONFIG_PAGING_LEVELS >= 3 */
+
+ if ( !paging_mode_translate(d) )
+ return;
+
+ if ( pagetable_get_pfn(d->arch.phys_table) == 0 )
+ return;
+
+ p2m_lock(d);
+
+#if CONFIG_PAGING_LEVELS == 4
+ l4e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+#elif CONFIG_PAGING_LEVELS == 3
+ l3e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+#else /* CONFIG_PAGING_LEVELS == 2 */
+ l2e = map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
+#endif
+
+#if CONFIG_PAGING_LEVELS >= 3
+#if CONFIG_PAGING_LEVELS >= 4
+ for ( i4 = 0; i4 < L4_PAGETABLE_ENTRIES; i4++ )
+ {
+ if ( !(l4e_get_flags(l4e[i4]) & _PAGE_PRESENT) )
+ {
+ continue;
+ }
+ l3e = map_domain_page(mfn_x(_mfn(l4e_get_pfn(l4e[i4]))));
+#endif /* now at levels 3 or 4... */
+ for ( i3 = 0;
+ i3 < ((CONFIG_PAGING_LEVELS==4) ? L3_PAGETABLE_ENTRIES : 8);
+ i3++ )
+ {
+ if ( !(l3e_get_flags(l3e[i3]) & _PAGE_PRESENT) )
+ {
+ continue;
+ }
+ l2e = map_domain_page(mfn_x(_mfn(l3e_get_pfn(l3e[i3]))));
+#endif /* all levels... */
+ for ( i2 = 0; i2 < L2_PAGETABLE_ENTRIES; i2++ )
+ {
+ if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
+ {
+ continue;
+ }
+ l1e = map_domain_page(mfn_x(_mfn(l2e_get_pfn(l2e[i2]))));
+
+ for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
+ {
+ if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+ continue;
+ mfn = l1e_get_pfn(l1e[i1]);
+ gfn = get_gpfn_from_mfn(mfn);
+ /* create a new 1le entry using l1e_flags */
+ l1e_content = l1e_from_pfn(mfn, l1e_flags);
+ paging_write_p2m_entry(d, gfn, &l1e[i1], l1e_content, 1);
+ }
+ unmap_domain_page(l1e);
+ }
+#if CONFIG_PAGING_LEVELS >= 3
+ unmap_domain_page(l2e);
+ }
+#if CONFIG_PAGING_LEVELS >= 4
+ unmap_domain_page(l3e);
+ }
+#endif
+#endif
+
+#if CONFIG_PAGING_LEVELS == 4
+ unmap_domain_page(l4e);
+#elif CONFIG_PAGING_LEVELS == 3
+ unmap_domain_page(l3e);
+#else /* CONFIG_PAGING_LEVELS == 2 */
+ unmap_domain_page(l2e);
+#endif
+
+ p2m_unlock(d);
+}
+
+/* This function traces through P2M table and modifies l1e flags of a specific
+ * gpa.
+ */
+int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags)
+{
+ unsigned long gfn;
+ mfn_t mfn;
+
+ p2m_lock(d);
+
+ gfn = gpa >> PAGE_SHIFT;
+ mfn = gfn_to_mfn(d, gfn);
+ if ( mfn_valid(mfn) )
+ set_p2m_entry(d, gfn, mfn, l1e_flags);
+
+ p2m_unlock(d);
+
+ return 1;
+}
/*
* Local variables:
#include <asm/shadow.h>
#include <asm/p2m.h>
#include <asm/hap.h>
+#include <asm/guest_access.h>
/* Xen command-line option to enable hardware-assisted paging */
int opt_hap_enabled;
debugtrace_printk("pgdebug: %s(): " _f, __func__, ##_a); \
} while (0)
+/************************************************/
+/* LOG DIRTY SUPPORT */
+/************************************************/
+/* Override macros from asm/page.h to make them work with mfn_t */
+#undef mfn_to_page
+#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#undef mfn_valid
+#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#undef page_to_mfn
+#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define log_dirty_lock_init(_d) \
+ do { \
+ spin_lock_init(&(_d)->arch.paging.log_dirty.lock); \
+ (_d)->arch.paging.log_dirty.locker = -1; \
+ (_d)->arch.paging.log_dirty.locker_function = "nobody"; \
+ } while (0)
+
+#define log_dirty_lock(_d) \
+ do { \
+ if (unlikely((_d)->arch.paging.log_dirty.locker==current->processor))\
+ { \
+ printk("Error: paging log dirty lock held by %s\n", \
+ (_d)->arch.paging.log_dirty.locker_function); \
+ BUG(); \
+ } \
+ spin_lock(&(_d)->arch.paging.log_dirty.lock); \
+ ASSERT((_d)->arch.paging.log_dirty.locker == -1); \
+ (_d)->arch.paging.log_dirty.locker = current->processor; \
+ (_d)->arch.paging.log_dirty.locker_function = __func__; \
+ } while (0)
+
+#define log_dirty_unlock(_d) \
+ do { \
+ ASSERT((_d)->arch.paging.log_dirty.locker == current->processor); \
+ (_d)->arch.paging.log_dirty.locker = -1; \
+ (_d)->arch.paging.log_dirty.locker_function = "nobody"; \
+ spin_unlock(&(_d)->arch.paging.log_dirty.lock); \
+ } while (0)
+
+/* allocate bitmap resources for log dirty */
+int paging_alloc_log_dirty_bitmap(struct domain *d)
+{
+ ASSERT(d->arch.paging.log_dirty.bitmap == NULL);
+ d->arch.paging.log_dirty.bitmap_size =
+ (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
+ d->arch.paging.log_dirty.bitmap =
+ xmalloc_array(unsigned long,
+ d->arch.paging.log_dirty.bitmap_size / BITS_PER_LONG);
+ if ( d->arch.paging.log_dirty.bitmap == NULL )
+ {
+ d->arch.paging.log_dirty.bitmap_size = 0;
+ return -ENOMEM;
+ }
+ memset(d->arch.paging.log_dirty.bitmap, 0,
+ d->arch.paging.log_dirty.bitmap_size/8);
+
+ return 0;
+}
+
+/* free bitmap resources */
+void paging_free_log_dirty_bitmap(struct domain *d)
+{
+ d->arch.paging.log_dirty.bitmap_size = 0;
+ if ( d->arch.paging.log_dirty.bitmap )
+ {
+ xfree(d->arch.paging.log_dirty.bitmap);
+ d->arch.paging.log_dirty.bitmap = NULL;
+ }
+}
+
+int paging_log_dirty_enable(struct domain *d)
+{
+ int ret;
+
+ domain_pause(d);
+ log_dirty_lock(d);
+
+ if ( paging_mode_log_dirty(d) )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = paging_alloc_log_dirty_bitmap(d);
+ if ( ret != 0 )
+ {
+ paging_free_log_dirty_bitmap(d);
+ goto out;
+ }
+
+ ret = d->arch.paging.log_dirty.enable_log_dirty(d);
+ if ( ret != 0 )
+ paging_free_log_dirty_bitmap(d);
+
+ out:
+ log_dirty_unlock(d);
+ domain_unpause(d);
+ return ret;
+}
+
+int paging_log_dirty_disable(struct domain *d)
+{
+ int ret;
+
+ domain_pause(d);
+ log_dirty_lock(d);
+ ret = d->arch.paging.log_dirty.disable_log_dirty(d);
+ if ( !paging_mode_log_dirty(d) )
+ paging_free_log_dirty_bitmap(d);
+ log_dirty_unlock(d);
+ domain_unpause(d);
+
+ return ret;
+}
+
+/* Mark a page as dirty */
+void paging_mark_dirty(struct domain *d, unsigned long guest_mfn)
+{
+ unsigned long pfn;
+ mfn_t gmfn;
+
+ gmfn = _mfn(guest_mfn);
+
+ if ( !paging_mode_log_dirty(d) || !mfn_valid(gmfn) )
+ return;
+
+ log_dirty_lock(d);
+
+ ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
+
+ /* We /really/ mean PFN here, even for non-translated guests. */
+ pfn = get_gpfn_from_mfn(mfn_x(gmfn));
+
+ /*
+ * Values with the MSB set denote MFNs that aren't really part of the
+ * domain's pseudo-physical memory map (e.g., the shared info frame).
+ * Nothing to do here...
+ */
+ if ( unlikely(!VALID_M2P(pfn)) )
+ return;
+
+ if ( likely(pfn < d->arch.paging.log_dirty.bitmap_size) )
+ {
+ if ( !__test_and_set_bit(pfn, d->arch.paging.log_dirty.bitmap) )
+ {
+ PAGING_DEBUG(LOGDIRTY,
+ "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
+ mfn_x(gmfn), pfn, d->domain_id);
+ d->arch.paging.log_dirty.dirty_count++;
+ }
+ }
+ else
+ {
+ PAGING_PRINTK("mark_dirty OOR! "
+ "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
+ "owner=%d c=%08x t=%" PRtype_info "\n",
+ mfn_x(gmfn),
+ pfn,
+ d->arch.paging.log_dirty.bitmap_size,
+ d->domain_id,
+ (page_get_owner(mfn_to_page(gmfn))
+ ? page_get_owner(mfn_to_page(gmfn))->domain_id
+ : -1),
+ mfn_to_page(gmfn)->count_info,
+ mfn_to_page(gmfn)->u.inuse.type_info);
+ }
+
+ log_dirty_unlock(d);
+}
+
+/* Read a domain's log-dirty bitmap and stats. If the operation is a CLEAN,
+ * clear the bitmap and stats as well. */
+int paging_log_dirty_op(struct domain *d, struct xen_domctl_shadow_op *sc)
+{
+ int i, rv = 0, clean = 0, peek = 1;
+
+ domain_pause(d);
+ log_dirty_lock(d);
+
+ clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
+
+ PAGING_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
+ (clean) ? "clean" : "peek",
+ d->domain_id,
+ d->arch.paging.log_dirty.fault_count,
+ d->arch.paging.log_dirty.dirty_count);
+
+ sc->stats.fault_count = d->arch.paging.log_dirty.fault_count;
+ sc->stats.dirty_count = d->arch.paging.log_dirty.dirty_count;
+
+ if ( clean )
+ {
+ d->arch.paging.log_dirty.fault_count = 0;
+ d->arch.paging.log_dirty.dirty_count = 0;
+
+ /* We need to further call clean_dirty_bitmap() functions of specific
+ * paging modes (shadow or hap).
+ */
+ d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+ }
+
+ if ( guest_handle_is_null(sc->dirty_bitmap) )
+ /* caller may have wanted just to clean the state or access stats. */
+ peek = 0;
+
+ if ( (peek || clean) && (d->arch.paging.log_dirty.bitmap == NULL) )
+ {
+ rv = -EINVAL; /* perhaps should be ENOMEM? */
+ goto out;
+ }
+
+ if ( sc->pages > d->arch.paging.log_dirty.bitmap_size )
+ sc->pages = d->arch.paging.log_dirty.bitmap_size;
+
+#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
+ for ( i = 0; i < sc->pages; i += CHUNK )
+ {
+ int bytes = ((((sc->pages - i) > CHUNK)
+ ? CHUNK
+ : (sc->pages - i)) + 7) / 8;
+
+ if ( likely(peek) )
+ {
+ if ( copy_to_guest_offset(
+ sc->dirty_bitmap, i/8,
+ (uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), bytes) )
+ {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+
+ if ( clean )
+ memset((uint8_t *)d->arch.paging.log_dirty.bitmap + (i/8), 0, bytes);
+ }
+#undef CHUNK
+
+ out:
+ log_dirty_unlock(d);
+ domain_unpause(d);
+ return rv;
+}
+
+
+/* Note that this function takes three function pointers. Callers must supply
+ * these functions for log dirty code to call. This function usually is
+ * invoked when paging is enabled. Check shadow_enable() and hap_enable() for
+ * reference.
+ */
+void paging_log_dirty_init(struct domain *d,
+ int (*enable_log_dirty)(struct domain *d),
+ int (*disable_log_dirty)(struct domain *d),
+ void (*clean_dirty_bitmap)(struct domain *d))
+{
+ /* We initialize log dirty lock first */
+ log_dirty_lock_init(d);
+
+ d->arch.paging.log_dirty.enable_log_dirty = enable_log_dirty;
+ d->arch.paging.log_dirty.disable_log_dirty = disable_log_dirty;
+ d->arch.paging.log_dirty.clean_dirty_bitmap = clean_dirty_bitmap;
+}
+
+/* This function fress log dirty bitmap resources. */
+void paging_log_dirty_teardown(struct domain*d)
+{
+ log_dirty_lock(d);
+ paging_free_log_dirty_bitmap(d);
+ log_dirty_unlock(d);
+}
+/************************************************/
+/* CODE FOR PAGING SUPPORT */
+/************************************************/
/* Domain paging struct initialization. */
void paging_domain_init(struct domain *d)
{
int paging_domctl(struct domain *d, xen_domctl_shadow_op_t *sc,
XEN_GUEST_HANDLE(void) u_domctl)
{
+ int rc;
+
+ if ( unlikely(d == current->domain) )
+ {
+ gdprintk(XENLOG_INFO, "Dom %u tried to do a paging op on itself.\n",
+ d->domain_id);
+ return -EINVAL;
+ }
+
+ if ( unlikely(d->is_dying) )
+ {
+ gdprintk(XENLOG_INFO, "Ignoring paging op on dying domain %u\n",
+ d->domain_id);
+ return 0;
+ }
+
+ if ( unlikely(d->vcpu[0] == NULL) )
+ {
+ PAGING_ERROR("Paging op on a domain (%u) with no vcpus\n",
+ d->domain_id);
+ return -EINVAL;
+ }
+
+ /* Code to handle log-dirty. Note that some log dirty operations
+ * piggy-back on shadow operations. For example, when
+ * XEN_DOMCTL_SHADOW_OP_OFF is called, it first checks whether log dirty
+ * mode is enabled. If does, we disables log dirty and continues with
+ * shadow code. For this reason, we need to further dispatch domctl
+ * to next-level paging code (shadow or hap).
+ */
+ switch ( sc->op )
+ {
+ case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+ return paging_log_dirty_enable(d);
+
+ case XEN_DOMCTL_SHADOW_OP_ENABLE:
+ if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
+ return paging_log_dirty_enable(d);
+
+ case XEN_DOMCTL_SHADOW_OP_OFF:
+ if ( paging_mode_log_dirty(d) )
+ if ( (rc = paging_log_dirty_disable(d)) != 0 )
+ return rc;
+
+ case XEN_DOMCTL_SHADOW_OP_CLEAN:
+ case XEN_DOMCTL_SHADOW_OP_PEEK:
+ return paging_log_dirty_op(d, sc);
+ }
+
/* Here, dispatch domctl to the appropriate paging code */
if ( opt_hap_enabled && is_hvm_domain(d) )
- return hap_domctl(d, sc, u_domctl);
+ return hap_domctl(d, sc, u_domctl);
else
- return shadow_domctl(d, sc, u_domctl);
+ return shadow_domctl(d, sc, u_domctl);
}
/* Call when destroying a domain */
void paging_teardown(struct domain *d)
{
+ /* clean up log dirty resources. */
+ paging_log_dirty_teardown(d);
+
if ( opt_hap_enabled && is_hvm_domain(d) )
hap_teardown(d);
else
__initcall(shadow_audit_key_init);
#endif /* SHADOW_AUDIT */
-static void sh_free_log_dirty_bitmap(struct domain *d);
-
int _shadow_mode_refcounts(struct domain *d)
{
return shadow_mode_refcounts(d);
int result = 0;
struct page_info *page = mfn_to_page(gmfn);
- sh_mark_dirty(v->domain, gmfn);
+ paging_mark_dirty(v->domain, mfn_x(gmfn));
// Determine which types of shadows are affected, and update each.
//
}
}
+ /* initialize log dirty here */
+ paging_log_dirty_init(d, shadow_enable_log_dirty,
+ shadow_disable_log_dirty, shadow_clean_dirty_bitmap);
+
/* Init the P2M table. Must be done before we take the shadow lock
* to avoid possible deadlock. */
if ( mode & PG_translate )
goto out_unlocked;
}
+
shadow_lock(d);
/* Sanity check again with the lock held */
/* Release the hash table back to xenheap */
if (d->arch.paging.shadow.hash_table)
shadow_hash_teardown(d);
- /* Release the log-dirty bitmap of dirtied pages */
- sh_free_log_dirty_bitmap(d);
/* Should not have any more memory held */
SHADOW_PRINTK("teardown done."
" Shadow pages total = %u, free = %u, p2m=%u\n",
return ret;
}
-static int
-sh_alloc_log_dirty_bitmap(struct domain *d)
-{
- ASSERT(d->arch.paging.shadow.dirty_bitmap == NULL);
- d->arch.paging.shadow.dirty_bitmap_size =
- (domain_get_maximum_gpfn(d) + BITS_PER_LONG) & ~(BITS_PER_LONG - 1);
- d->arch.paging.shadow.dirty_bitmap =
- xmalloc_array(unsigned long,
- d->arch.paging.shadow.dirty_bitmap_size / BITS_PER_LONG);
- if ( d->arch.paging.shadow.dirty_bitmap == NULL )
- {
- d->arch.paging.shadow.dirty_bitmap_size = 0;
- return -ENOMEM;
- }
- memset(d->arch.paging.shadow.dirty_bitmap, 0,
- d->arch.paging.shadow.dirty_bitmap_size/8);
-
- return 0;
-}
-
-static void
-sh_free_log_dirty_bitmap(struct domain *d)
-{
- d->arch.paging.shadow.dirty_bitmap_size = 0;
- if ( d->arch.paging.shadow.dirty_bitmap )
- {
- xfree(d->arch.paging.shadow.dirty_bitmap);
- d->arch.paging.shadow.dirty_bitmap = NULL;
- }
-}
-
-static int shadow_log_dirty_enable(struct domain *d)
-{
- int ret;
-
- domain_pause(d);
- shadow_lock(d);
-
- if ( shadow_mode_log_dirty(d) )
- {
- ret = -EINVAL;
- goto out;
- }
-
- if ( shadow_mode_enabled(d) )
- {
- /* This domain already has some shadows: need to clear them out
- * of the way to make sure that all references to guest memory are
- * properly write-protected */
- shadow_blow_tables(d);
- }
-
-#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
- /* 32bit PV guests on 64bit xen behave like older 64bit linux: they
- * change an l4e instead of cr3 to switch tables. Give them the
- * same optimization */
- if ( is_pv_32on64_domain(d) )
- d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
-#endif
-
- ret = sh_alloc_log_dirty_bitmap(d);
- if ( ret != 0 )
- {
- sh_free_log_dirty_bitmap(d);
- goto out;
- }
-
- ret = shadow_one_bit_enable(d, PG_log_dirty);
- if ( ret != 0 )
- sh_free_log_dirty_bitmap(d);
-
- out:
- shadow_unlock(d);
- domain_unpause(d);
- return ret;
-}
-
-static int shadow_log_dirty_disable(struct domain *d)
-{
- int ret;
-
- domain_pause(d);
- shadow_lock(d);
- ret = shadow_one_bit_disable(d, PG_log_dirty);
- if ( !shadow_mode_log_dirty(d) )
- sh_free_log_dirty_bitmap(d);
- shadow_unlock(d);
- domain_unpause(d);
-
- return ret;
-}
-
/**************************************************************************/
/* P2M map manipulations */
BUG();
}
-
-/* Read a domain's log-dirty bitmap and stats.
- * If the operation is a CLEAN, clear the bitmap and stats as well. */
-static int shadow_log_dirty_op(
- struct domain *d, struct xen_domctl_shadow_op *sc)
+/* Shadow specific code which is called in paging_log_dirty_enable().
+ * Return 0 if no problem found.
+ */
+int shadow_enable_log_dirty(struct domain *d)
{
- int i, rv = 0, clean = 0, peek = 1;
+ int ret;
- domain_pause(d);
+ /* shadow lock is required here */
shadow_lock(d);
-
- clean = (sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN);
-
- SHADOW_DEBUG(LOGDIRTY, "log-dirty %s: dom %u faults=%u dirty=%u\n",
- (clean) ? "clean" : "peek",
- d->domain_id,
- d->arch.paging.shadow.fault_count,
- d->arch.paging.shadow.dirty_count);
-
- sc->stats.fault_count = d->arch.paging.shadow.fault_count;
- sc->stats.dirty_count = d->arch.paging.shadow.dirty_count;
-
- if ( clean )
+ if ( shadow_mode_enabled(d) )
{
- /* Need to revoke write access to the domain's pages again.
- * In future, we'll have a less heavy-handed approach to this,
- * but for now, we just unshadow everything except Xen. */
+ /* This domain already has some shadows: need to clear them out
+ * of the way to make sure that all references to guest memory are
+ * properly write-protected */
shadow_blow_tables(d);
-
- d->arch.paging.shadow.fault_count = 0;
- d->arch.paging.shadow.dirty_count = 0;
}
- if ( guest_handle_is_null(sc->dirty_bitmap) )
- /* caller may have wanted just to clean the state or access stats. */
- peek = 0;
-
- if ( (peek || clean) && (d->arch.paging.shadow.dirty_bitmap == NULL) )
- {
- rv = -EINVAL; /* perhaps should be ENOMEM? */
- goto out;
- }
-
- if ( sc->pages > d->arch.paging.shadow.dirty_bitmap_size )
- sc->pages = d->arch.paging.shadow.dirty_bitmap_size;
-
-#define CHUNK (8*1024) /* Transfer and clear in 1kB chunks for L1 cache. */
- for ( i = 0; i < sc->pages; i += CHUNK )
- {
- int bytes = ((((sc->pages - i) > CHUNK)
- ? CHUNK
- : (sc->pages - i)) + 7) / 8;
+#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
+ /* 32bit PV guests on 64bit xen behave like older 64bit linux: they
+ * change an l4e instead of cr3 to switch tables. Give them the
+ * same optimization */
+ if ( is_pv_32on64_domain(d) )
+ d->arch.paging.shadow.opt_flags = SHOPT_LINUX_L3_TOPLEVEL;
+#endif
+
+ ret = shadow_one_bit_enable(d, PG_log_dirty);
+ shadow_unlock(d);
- if ( likely(peek) )
- {
- if ( copy_to_guest_offset(
- sc->dirty_bitmap, i/8,
- (uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), bytes) )
- {
- rv = -EFAULT;
- goto out;
- }
- }
+ return ret;
+}
- if ( clean )
- memset((uint8_t *)d->arch.paging.shadow.dirty_bitmap + (i/8), 0, bytes);
- }
-#undef CHUNK
+/* shadow specfic code which is called in paging_log_dirty_disable() */
+int shadow_disable_log_dirty(struct domain *d)
+{
+ int ret;
- out:
+ /* shadow lock is required here */
+ shadow_lock(d);
+ ret = shadow_one_bit_disable(d, PG_log_dirty);
shadow_unlock(d);
- domain_unpause(d);
- return rv;
+
+ return ret;
}
-
-/* Mark a page as dirty */
-void sh_mark_dirty(struct domain *d, mfn_t gmfn)
+/* This function is called when we CLEAN log dirty bitmap. See
+ * paging_log_dirty_op() for details.
+ */
+void shadow_clean_dirty_bitmap(struct domain *d)
{
- unsigned long pfn;
- int do_locking;
-
- if ( !shadow_mode_log_dirty(d) || !mfn_valid(gmfn) )
- return;
-
- /* Although this is an externally visible function, we do not know
- * whether the shadow lock will be held when it is called (since it
- * can be called from __hvm_copy during emulation).
- * If the lock isn't held, take it for the duration of the call. */
- do_locking = !shadow_locked_by_me(d);
- if ( do_locking )
- {
- shadow_lock(d);
- /* Check the mode again with the lock held */
- if ( unlikely(!shadow_mode_log_dirty(d)) )
- {
- shadow_unlock(d);
- return;
- }
- }
-
- ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
-
- /* We /really/ mean PFN here, even for non-translated guests. */
- pfn = get_gpfn_from_mfn(mfn_x(gmfn));
-
- /*
- * Values with the MSB set denote MFNs that aren't really part of the
- * domain's pseudo-physical memory map (e.g., the shared info frame).
- * Nothing to do here...
- */
- if ( unlikely(!VALID_M2P(pfn)) )
- return;
-
- /* N.B. Can use non-atomic TAS because protected by shadow_lock. */
- if ( likely(pfn < d->arch.paging.shadow.dirty_bitmap_size) )
- {
- if ( !__test_and_set_bit(pfn, d->arch.paging.shadow.dirty_bitmap) )
- {
- SHADOW_DEBUG(LOGDIRTY,
- "marked mfn %" PRI_mfn " (pfn=%lx), dom %d\n",
- mfn_x(gmfn), pfn, d->domain_id);
- d->arch.paging.shadow.dirty_count++;
- }
- }
- else
- {
- SHADOW_PRINTK("mark_dirty OOR! "
- "mfn=%" PRI_mfn " pfn=%lx max=%x (dom %d)\n"
- "owner=%d c=%08x t=%" PRtype_info "\n",
- mfn_x(gmfn),
- pfn,
- d->arch.paging.shadow.dirty_bitmap_size,
- d->domain_id,
- (page_get_owner(mfn_to_page(gmfn))
- ? page_get_owner(mfn_to_page(gmfn))->domain_id
- : -1),
- mfn_to_page(gmfn)->count_info,
- mfn_to_page(gmfn)->u.inuse.type_info);
- }
-
- if ( do_locking ) shadow_unlock(d);
+ shadow_lock(d);
+ /* Need to revoke write access to the domain's pages again.
+ * In future, we'll have a less heavy-handed approach to this,
+ * but for now, we just unshadow everything except Xen. */
+ shadow_blow_tables(d);
+ shadow_unlock(d);
}
-
/**************************************************************************/
/* Shadow-control XEN_DOMCTL dispatcher */
{
int rc, preempted = 0;
- if ( unlikely(d == current->domain) )
- {
- gdprintk(XENLOG_INFO, "Dom %u tried to do a shadow op on itself.\n",
- d->domain_id);
- return -EINVAL;
- }
-
- if ( unlikely(d->is_dying) )
- {
- gdprintk(XENLOG_INFO, "Ignoring shadow op on dying domain %u\n",
- d->domain_id);
- return 0;
- }
-
- if ( unlikely(d->vcpu[0] == NULL) )
- {
- SHADOW_ERROR("Shadow op on a domain (%u) with no vcpus\n",
- d->domain_id);
- return -EINVAL;
- }
-
switch ( sc->op )
{
case XEN_DOMCTL_SHADOW_OP_OFF:
- if ( shadow_mode_log_dirty(d) )
- if ( (rc = shadow_log_dirty_disable(d)) != 0 )
- return rc;
if ( d->arch.paging.mode == PG_SH_enable )
if ( (rc = shadow_test_disable(d)) != 0 )
return rc;
case XEN_DOMCTL_SHADOW_OP_ENABLE_TEST:
return shadow_test_enable(d);
- case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
- return shadow_log_dirty_enable(d);
-
case XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE:
return shadow_enable(d, PG_refcounts|PG_translate);
- case XEN_DOMCTL_SHADOW_OP_CLEAN:
- case XEN_DOMCTL_SHADOW_OP_PEEK:
- return shadow_log_dirty_op(d, sc);
-
case XEN_DOMCTL_SHADOW_OP_ENABLE:
- if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
- return shadow_log_dirty_enable(d);
return shadow_enable(d, sc->mode << PG_mode_shift);
case XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION:
}
/* Set the bit(s) */
- sh_mark_dirty(v->domain, gmfn);
+ paging_mark_dirty(v->domain, mfn_x(gmfn));
SHADOW_DEBUG(A_AND_D, "gfn = %" SH_PRI_gfn ", "
"old flags = %#x, new flags = %#x\n",
gfn_x(guest_l1e_get_gfn(*ep)), guest_l1e_get_flags(*ep),
if ( unlikely((level == 1) && shadow_mode_log_dirty(d)) )
{
if ( ft & FETCH_TYPE_WRITE )
- sh_mark_dirty(d, target_mfn);
+ paging_mark_dirty(d, mfn_x(target_mfn));
else if ( !sh_mfn_is_dirty(d, target_mfn) )
sflags &= ~_PAGE_RW;
}
}
perfc_incr(shadow_fault_fixed);
- d->arch.paging.shadow.fault_count++;
+ d->arch.paging.log_dirty.fault_count++;
reset_early_unshadow(v);
done:
else
reset_early_unshadow(v);
- sh_mark_dirty(v->domain, mfn);
+ paging_mark_dirty(v->domain, mfn_x(mfn));
sh_unmap_domain_page(addr);
shadow_audit_tables(v);
else
reset_early_unshadow(v);
- sh_mark_dirty(v->domain, mfn);
+ paging_mark_dirty(v->domain, mfn_x(mfn));
sh_unmap_domain_page(addr);
shadow_audit_tables(v);
else
reset_early_unshadow(v);
- sh_mark_dirty(v->domain, mfn);
+ paging_mark_dirty(v->domain, mfn_x(mfn));
sh_unmap_domain_page(addr);
shadow_audit_tables(v);
{
unsigned long pfn;
ASSERT(shadow_mode_log_dirty(d));
- ASSERT(d->arch.paging.shadow.dirty_bitmap != NULL);
+ ASSERT(d->arch.paging.log_dirty.bitmap != NULL);
/* We /really/ mean PFN here, even for non-translated guests. */
pfn = get_gpfn_from_mfn(mfn_x(gmfn));
if ( likely(VALID_M2P(pfn))
- && likely(pfn < d->arch.paging.shadow.dirty_bitmap_size)
- && test_bit(pfn, d->arch.paging.shadow.dirty_bitmap) )
+ && likely(pfn < d->arch.paging.log_dirty.bitmap_size)
+ && test_bit(pfn, d->arch.paging.log_dirty.bitmap) )
return 1;
return 0;
/* Fast MMIO path heuristic */
int has_fast_mmio_entries;
-
- /* Shadow log-dirty bitmap */
- unsigned long *dirty_bitmap;
- unsigned int dirty_bitmap_size; /* in pages, bit per page */
-
- /* Shadow log-dirty mode stats */
- unsigned int fault_count;
- unsigned int dirty_count;
};
struct shadow_vcpu {
/************************************************/
/* p2m handling */
/************************************************/
-
struct p2m_domain {
/* Lock that protects updates to the p2m */
spinlock_t lock;
/************************************************/
/* common paging data structure */
/************************************************/
-struct paging_domain {
- u32 mode; /* flags to control paging operation */
+struct log_dirty_domain {
+ /* log-dirty lock */
+ spinlock_t lock;
+ int locker; /* processor that holds the lock */
+ const char *locker_function; /* func that took it */
+
+ /* log-dirty bitmap to record dirty pages */
+ unsigned long *bitmap;
+ unsigned int bitmap_size; /* in pages, bit per page */
+
+ /* log-dirty mode stats */
+ unsigned int fault_count;
+ unsigned int dirty_count;
+
+ /* functions which are paging mode specific */
+ int (*enable_log_dirty )(struct domain *d);
+ int (*disable_log_dirty )(struct domain *d);
+ void (*clean_dirty_bitmap )(struct domain *d);
+};
+struct paging_domain {
+ /* flags to control paging operation */
+ u32 mode;
/* extension for shadow paging support */
- struct shadow_domain shadow;
-
- /* Other paging assistance code will have structs here */
- struct hap_domain hap;
+ struct shadow_domain shadow;
+ /* extension for hardware-assited paging */
+ struct hap_domain hap;
+ /* log dirty support */
+ struct log_dirty_domain log_dirty;
};
-
struct paging_vcpu {
/* Pointers to mode-specific entry points. */
struct paging_mode *mode;
#define gnttab_shared_gmfn(d, t, i) \
(mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i)))
-#define gnttab_mark_dirty(d, f) mark_dirty((d), (f))
+#define gnttab_mark_dirty(d, f) paging_mark_dirty((d), (f))
static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr)
{
void guest_physmap_remove_page(struct domain *d, unsigned long gfn,
unsigned long mfn);
+/* set P2M table l1e flags */
+void p2m_set_flags_global(struct domain *d, u32 l1e_flags);
+
+/* set P2M table l1e flags for a gpa */
+int p2m_set_flags(struct domain *d, paddr_t gpa, u32 l1e_flags);
#endif /* _XEN_P2M_H */
#define paging_mode_translate(_d) ((_d)->arch.paging.mode & PG_translate)
#define paging_mode_external(_d) ((_d)->arch.paging.mode & PG_external)
+/* flags used for paging debug */
+#define PAGING_DEBUG_LOGDIRTY 0
+
/******************************************************************************
* The equivalent for a particular vcpu of a shadowed domain. */
struct shadow_paging_mode shadow;
};
+/*****************************************************************************
+ * Log dirty code */
+
+/* allocate log dirty bitmap resource for recording dirty pages */
+int paging_alloc_log_dirty_bitmap(struct domain *d);
+
+/* free log dirty bitmap resource */
+void paging_free_log_dirty_bitmap(struct domain *d);
+
+/* enable log dirty */
+int paging_log_dirty_enable(struct domain *d);
+
+/* disable log dirty */
+int paging_log_dirty_disable(struct domain *d);
+
+/* log dirty initialization */
+void paging_log_dirty_init(struct domain *d,
+ int (*enable_log_dirty)(struct domain *d),
+ int (*disable_log_dirty)(struct domain *d),
+ void (*clean_dirty_bitmap)(struct domain *d));
+
+/* mark a page as dirty */
+void paging_mark_dirty(struct domain *d, unsigned long guest_mfn);
/*****************************************************************************
* Entry points into the paging-assistance code */
/* Call once all of the references to the domain have gone away */
void shadow_final_teardown(struct domain *d);
-/* Mark a page as dirty in the log-dirty bitmap: called when Xen
- * makes changes to guest memory on its behalf. */
-void sh_mark_dirty(struct domain *d, mfn_t gmfn);
-/* Cleaner version so we don't pepper shadow_mode tests all over the place */
-static inline void mark_dirty(struct domain *d, unsigned long gmfn)
-{
- if ( unlikely(shadow_mode_log_dirty(d)) )
- /* See the comment about locking in sh_mark_dirty */
- sh_mark_dirty(d, _mfn(gmfn));
-}
+/* shadow code to call when log dirty is enabled */
+int shadow_enable_log_dirty(struct domain *d);
+
+/* shadow code to call when log dirty is disabled */
+int shadow_disable_log_dirty(struct domain *d);
+
+/* shadow code to call when bitmap is being cleaned */
+void shadow_clean_dirty_bitmap(struct domain *d);
/* Update all the things that are derived from the guest's CR0/CR3/CR4.
* Called to initialize paging structures if the paging mode